/**************************************************************
This selects two samples - graduates in CTE fields from CCs,
and any student in a two-year school that shows up in a "program"
with more than 10 credits.
**************************************************************/

%macro sample_select ; 

/* this macro creates CTE flag  */
%macro cte_flag ; 

degcip_2dig in ('10','11','12','13','15','19',
                '43','44','46','47','48','49','51','52') 


%mend ;

data graduates (keep= pik degcip_2dig opeid year_grad quarter_grad 
				qtime_grad deglevl_code state flag_cte) /view=graduates;

    merge INPUTS.pseo_ny_cuny_2001_2019_deg_rsch (in=a drop=year_grad) 
          INPUTS.pseo_ny_cuny_2001_2019_deg_pik (in=b) ;
    by dqb_source_id ; 
    
    state = '36' ;
    degcip_2dig = substr(degcip,1,2) ;
    flag_cte = %cte_flag;
    
    date_grad_num = input(date_grad,yymmdd10.) ;
    
    quarter_grad = qtr(date_grad_num) ;
    year_grad = year(date_grad_num) ;
    qtime_grad = %qtime(year_grad,quarter_grad,location=datastep) ;
    
    if deglevl_code in ('01','02','03','04') ;
    
    if year_grad>=2001 ; 
run;

proc freq data=INPUTS.pseo_ny_cuny_2001_2020_enr_rsch ;
	tables semenr ;
	title 'semester enroll' ;
run;
   
data nongraduates (keep = pik degcip_2dig opeid  credithours year quarter 
						state cte_flag deglevl_code )
    /view=nongraduates;
    
    merge INPUTS.pseo_ny_cuny_2001_2020_enr_rsch (in=a) 
          INPUTS.pseo_ny_cuny_2001_2020_enr_qpik (in=b rename=(qpik_ssn = pik)) ;
    by dqb_source_id ; 
    
    state = '36' ; 
    deglevl_code = degpur ; 
    degcip_2dig = substr(degcip,1,2) ;
    year = input(yearenr,4.)  ; 
    if semenr = '02' then quarter = 1 ; 
	if semenr = '06' then quarter = 2 ; 
	if semenr = '09' then quarter = 3 ; 
	if semenr = '12' then quarter = 4 ; 
    cte_flag=%cte_flag;
    
    if year>=2001 & deglevl_code in ('01','02','03','04') ;
    credithours = input(crdhrs,4.) ;
run;

proc sort data=nongraduates out=nongrads; 
	by pik opeid deglevl_code year quarter ; 
run;


    
proc summary data=nongrads nway ;
    class pik opeid deglevl_code ;
    var credithours cte_flag;
    output out=maxcrd (drop = _TYPE_ _FREQ_) 
            sum(credithours) = maxcrd 
            max( cte_flag)= cte_flag;
run;
    
proc sort data=nongrads ;
    by pik opeid deglevl_code ; 
run;
    
data nongraduates_finalenroll (drop=credithours cte_flag); 
    merge nongrads (in=a drop=cte_flag) maxcrd (in=b) ;
    by pik opeid deglevl_code;
    
    if maxcrd >=10 ;
            
    if last.deglevl_code and cte_flag = 1 then output; 
run;
            
proc sort data=graduates out=grads nodupkey ;
     by pik opeid deglevl_code ; 
run;
            
proc freq data=grads ;
      title 'CTE FLAG GRADS ' ;
      tables flag_cte ; 
run;

data OUTPUTS.all_students_cuny ; 
       merge grads (in=a) 
             nongraduates_finalenroll ; 
       by pik opeid deglevl_code ; 
            
       graduate = a ; 
            
       if (flag_cte and graduate) or not graduate ;
run;


%mend sample_select ; 

%sample_select;
